import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
virginia = pd.read_excel("Virginia_EV.xlsx")
virginia.head()
| Fuel Type Code | City | State | ZIP | EV Level2 EVSE Num | EV DC Fast Count | EV Network | Geocode Status | Latitude | Longitude | ... | ID | Updated At | Owner Type Code | Open Date | EV Connector Types | Country | Groups With Access Code (French) | Access Code | Facility Type | EV Pricing | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | ELEC | Floyd | VA | 24091 | 1.0 | NaN | Non-Networked | 200-9 | 36.910575 | -80.317360 | ... | 39514 | 2022-04-28 20:01:31 UTC | P | 2011-06-01 | J1772 | US | Public | public | HOTEL | Free for guests; $10 for non-guests |
| 1 | ELEC | Richmond | VA | 23284 | 2.0 | NaN | Non-Networked | GPS | 37.551409 | -77.452330 | ... | 39574 | 2021-03-11 23:22:17 UTC | SG | 2011-05-15 | J1772 | US | Public | public | COLLEGE_CAMPUS | Free |
| 2 | ELEC | Richmond | VA | 23298 | 2.0 | NaN | Non-Networked | GPS | 37.543387 | -77.429530 | ... | 39575 | 2022-02-10 19:42:29 UTC | SG | 2011-05-15 | J1772 | US | Privé | private | COLLEGE_CAMPUS | NaN |
| 3 | ELEC | Alexandria | VA | 22304 | 3.0 | 1.0 | Non-Networked | 200-9 | 38.809675 | -77.122192 | ... | 39763 | 2022-03-07 19:49:53 UTC | P | 2011-03-15 | CHADEMO J1772 J1772COMBO | US | Public - Appeler à l'avance | public | CAR_DEALER | Free |
| 4 | ELEC | Chantilly | VA | 20151 | 2.0 | 1.0 | Non-Networked | 200-9 | 38.899751 | -77.460168 | ... | 39764 | 2022-03-07 19:49:53 UTC | P | 2011-03-15 | CHADEMO J1772 | US | Public - Appeler à l'avance | public | CAR_DEALER | Free |
5 rows × 21 columns
#pip install geopandas
### Geo pandas is a python library handle shape files.
import geopandas as gpd
import matplotlib.pyplot as plt
import plotly.express as px
######################################### Load Virginia shapefile ######################################################
#################### Link to shape file: https://www.naturalearthdata.com/downloads/110m-cultural-vectors/110m-admin-1-states-provinces/
virginia_gpd = gpd.read_file('C:\\Users\\Harinath\\Downloads\\ne_110m_populated_places\\ne_110m_populated_places.shp')
virginia = pd.read_excel("Virginia_EV.xlsx")
###################################### Plot EV stations in Virginia using plotly ##############################################
fig = px.scatter_mapbox(virginia, lat='Latitude', lon='Longitude', color='EV Network',
color_discrete_map={'Electrify America': 'red', 'Other Providers': 'blue'},
hover_data={'EV Network': True},
mapbox_style='carto-positron', zoom=6, center={'lat': 38.0037, 'lon': -79.4588})
fig.update_layout(title='EV Stations in Virginia', margin={"r": 0, "t": 30, "l": 0, "b": 0})
fig.show()
virginia.info()
<class 'pandas.core.frame.DataFrame'> RangeIndex: 1210 entries, 0 to 1209 Data columns (total 21 columns): # Column Non-Null Count Dtype --- ------ -------------- ----- 0 Fuel Type Code 1210 non-null object 1 City 1210 non-null object 2 State 1210 non-null object 3 ZIP 1210 non-null object 4 EV Level2 EVSE Num 1051 non-null float64 5 EV DC Fast Count 205 non-null float64 6 EV Network 1210 non-null object 7 Geocode Status 1210 non-null object 8 Latitude 1210 non-null float64 9 Longitude 1210 non-null float64 10 Date Last Confirmed 1208 non-null datetime64[ns] 11 ID 1210 non-null int64 12 Updated At 1210 non-null object 13 Owner Type Code 617 non-null object 14 Open Date 1208 non-null datetime64[ns] 15 EV Connector Types 1210 non-null object 16 Country 1210 non-null object 17 Groups With Access Code (French) 1210 non-null object 18 Access Code 1210 non-null object 19 Facility Type 562 non-null object 20 EV Pricing 579 non-null object dtypes: datetime64[ns](2), float64(4), int64(1), object(14) memory usage: 198.6+ KB
from sklearn.cluster import KMeans
from sklearn.metrics import pairwise_distances_argmin_min
#######Let's consider only the J1772 Connector type, since it's the most used and popular##########
virginia['EV Connector Type'] = virginia['EV Connector Types'].map({'J1772': 1})
##### Let's only consider the Hotel, college, shopping mall, parking garae and offices for the facility types. Since, people tend to visit these places often ###############
#############################################################################################################################################################################
virginia['Facility Type'] = virginia['Facility Type'].map({'HOTEL': 1, 'COLLEGE_CAMPUS': 2,'SHOPPING_MALL':3,'PARKING_GARAGE':4, 'OFFICE_BLDG':5})
features = ['Latitude', 'Longitude', 'EV Connector Types', 'Facility Type']
cluster_data = virginia[features]
####################### one-hot encoding for categorical variables ###################################
from sklearn.preprocessing import OneHotEncoder
encoder = OneHotEncoder(sparse=False)
encoded_features = encoder.fit_transform(virginia[['EV Connector Types', 'Facility Type']])
encoded_feature_names = encoder.get_feature_names_out(['EV Connector Types', 'Facility Type'])
# Combine encoded features with numerical features
features_encoded = pd.DataFrame(encoded_features, columns=encoded_feature_names)
features_encoded[['Latitude', 'Longitude']] = virginia[['Latitude', 'Longitude']]
features_encoded
| EV Connector Types_CHADEMO | EV Connector Types_CHADEMO J1772 | EV Connector Types_CHADEMO J1772 J1772COMBO | EV Connector Types_CHADEMO J1772COMBO | EV Connector Types_J1772 | EV Connector Types_J1772 J1772COMBO | EV Connector Types_J1772 NEMA1450 | EV Connector Types_J1772 NEMA515 | EV Connector Types_J1772 NEMA520 | EV Connector Types_J1772 TESLA | ... | EV Connector Types_J1772COMBO TESLA | EV Connector Types_TESLA | Facility Type_1.0 | Facility Type_2.0 | Facility Type_3.0 | Facility Type_4.0 | Facility Type_5.0 | Facility Type_nan | Latitude | Longitude | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 0.0 | 0.0 | 0.0 | 0.0 | 1.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | ... | 0.0 | 0.0 | 1.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 36.910575 | -80.317360 |
| 1 | 0.0 | 0.0 | 0.0 | 0.0 | 1.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | ... | 0.0 | 0.0 | 0.0 | 1.0 | 0.0 | 0.0 | 0.0 | 0.0 | 37.551409 | -77.452330 |
| 2 | 0.0 | 0.0 | 0.0 | 0.0 | 1.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | ... | 0.0 | 0.0 | 0.0 | 1.0 | 0.0 | 0.0 | 0.0 | 0.0 | 37.543387 | -77.429530 |
| 3 | 0.0 | 0.0 | 1.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | ... | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 1.0 | 38.809675 | -77.122192 |
| 4 | 0.0 | 1.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | ... | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 1.0 | 38.899751 | -77.460168 |
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 1205 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 1.0 | 0.0 | 0.0 | ... | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 1.0 | 38.038041 | -78.491176 |
| 1206 | 0.0 | 0.0 | 0.0 | 0.0 | 1.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | ... | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 1.0 | 38.856791 | -77.112017 |
| 1207 | 0.0 | 0.0 | 0.0 | 0.0 | 1.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | ... | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 1.0 | 38.405395 | -78.906390 |
| 1208 | 0.0 | 0.0 | 0.0 | 0.0 | 1.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | ... | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 1.0 | 36.866183 | -76.411139 |
| 1209 | 0.0 | 0.0 | 0.0 | 0.0 | 1.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | ... | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 1.0 | 37.155661 | -76.562663 |
1210 rows × 21 columns
features_encoded
| EV Connector Types_CHADEMO | EV Connector Types_CHADEMO J1772 | EV Connector Types_CHADEMO J1772 J1772COMBO | EV Connector Types_CHADEMO J1772COMBO | EV Connector Types_J1772 | EV Connector Types_J1772 J1772COMBO | EV Connector Types_J1772 NEMA1450 | EV Connector Types_J1772 NEMA515 | EV Connector Types_J1772 NEMA520 | EV Connector Types_J1772 TESLA | ... | EV Connector Types_J1772COMBO TESLA | EV Connector Types_TESLA | Facility Type_1.0 | Facility Type_2.0 | Facility Type_3.0 | Facility Type_4.0 | Facility Type_5.0 | Facility Type_nan | Latitude | Longitude | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 0.0 | 0.0 | 0.0 | 0.0 | 1.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | ... | 0.0 | 0.0 | 1.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 36.910575 | -80.317360 |
| 1 | 0.0 | 0.0 | 0.0 | 0.0 | 1.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | ... | 0.0 | 0.0 | 0.0 | 1.0 | 0.0 | 0.0 | 0.0 | 0.0 | 37.551409 | -77.452330 |
| 2 | 0.0 | 0.0 | 0.0 | 0.0 | 1.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | ... | 0.0 | 0.0 | 0.0 | 1.0 | 0.0 | 0.0 | 0.0 | 0.0 | 37.543387 | -77.429530 |
| 3 | 0.0 | 0.0 | 1.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | ... | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 1.0 | 38.809675 | -77.122192 |
| 4 | 0.0 | 1.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | ... | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 1.0 | 38.899751 | -77.460168 |
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 1205 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 1.0 | 0.0 | 0.0 | ... | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 1.0 | 38.038041 | -78.491176 |
| 1206 | 0.0 | 0.0 | 0.0 | 0.0 | 1.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | ... | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 1.0 | 38.856791 | -77.112017 |
| 1207 | 0.0 | 0.0 | 0.0 | 0.0 | 1.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | ... | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 1.0 | 38.405395 | -78.906390 |
| 1208 | 0.0 | 0.0 | 0.0 | 0.0 | 1.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | ... | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 1.0 | 36.866183 | -76.411139 |
| 1209 | 0.0 | 0.0 | 0.0 | 0.0 | 1.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | ... | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 1.0 | 37.155661 | -76.562663 |
1210 rows × 21 columns
###################################### Setting the number of clusters ##########################################
### Since my data set is very small, I've directly choosed 8 clusters as a starting poinnt, if we have a huge dataset,
### we can use elbow plot to do the job for us.
kmeans = KMeans(n_clusters=8)
############################# Let's asssign the cluster labels to a variable "Cluster" #############################
virginia['Cluster'] = kmeans.fit_predict(features_encoded)
############################ Seperating Electrify America's stations from other providers ####################################
electrify_america = virginia[virginia['EV Network'] == 'Electrify America']
other_providers = virginia[virginia['EV Network'] != 'Electrify America']
missing_clusters = []
for cluster_id in other_providers['Cluster'].unique():
cluster_data = other_providers[other_providers['Cluster'] == cluster_id]
if not any(electrify_america['Cluster'] == cluster_id):
missing_clusters.append(cluster_id)
###################### Generate suggestions for new Electrify America stations within missing clusters ########################
suggested_stations = []
for cluster_id in missing_clusters:
cluster_data = other_providers[other_providers['Cluster'] == cluster_id]
center_lat = cluster_data['Latitude'].mean()
center_lon = cluster_data['Longitude'].mean()
suggested_stations.append((center_lat, center_lon))
print("Suggested new stations for Electrify America:")
for station in suggested_stations:
print(station)
Suggested new stations for Electrify America: (37.55729195588232, -77.52949545147378) (38.8663456309343, -77.27613971807108) (38.342812017191164, -77.73928477465013)
len(suggested_stations) ### we can increase this number
3
#pip install geopy
# suggested_stations['Facility Type']
api_key=""
import geopandas as gpd
import pandas as pd
import plotly.express as px
import plotly.graph_objects as go
from sklearn.cluster import KMeans
from sklearn.metrics import pairwise_distances_argmin_min
import numpy as np
from geopy.geocoders import GoogleV3
ea_stations = virginia[virginia['EV Network'] == 'Electrify America'].copy()
other_stations = virginia[virginia['EV Network'] != 'Electrify America'].copy()
############################################# Electrify America's Stations #################################################
ea_stations.head(10)
| Fuel Type Code | City | State | ZIP | EV Level2 EVSE Num | EV DC Fast Count | EV Network | Geocode Status | Latitude | Longitude | ... | Owner Type Code | Open Date | EV Connector Types | Country | Groups With Access Code (French) | Access Code | Facility Type | EV Pricing | EV Connector Type | Cluster | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 427 | ELEC | Bristol | VA | 24202 | NaN | 4.0 | Electrify America | GPS | 36.635755 | -82.127622 | ... | NaN | 2019-05-17 | CHADEMO J1772COMBO | US | Public | public | NaN | NaN | NaN | 3 |
| 451 | ELEC | Falls Church | VA | 22044 | NaN | 6.0 | Electrify America | GPS | 38.867351 | -77.142683 | ... | NaN | 2019-10-19 | CHADEMO J1772COMBO | US | Public | public | NaN | NaN | NaN | 7 |
| 490 | ELEC | Alexandria | VA | 22306 | 1.0 | 3.0 | Electrify America | GPS | 38.742521 | -77.086696 | ... | NaN | 2019-11-14 | CHADEMO J1772 J1772COMBO | US | Public | public | NaN | NaN | NaN | 7 |
| 495 | ELEC | Vienna | VA | 22182 | NaN | 4.0 | Electrify America | GPS | 38.930184 | -77.245534 | ... | NaN | 2019-12-07 | CHADEMO J1772COMBO | US | Public | public | NaN | NaN | NaN | 7 |
| 554 | ELEC | Haymarket | VA | 20169 | NaN | 4.0 | Electrify America | GPS | 38.819204 | -77.645886 | ... | NaN | 2020-04-30 | CHADEMO J1772COMBO | US | Public | public | NaN | NaN | NaN | 7 |
| 565 | ELEC | Fredericksburg | VA | 22407 | NaN | 4.0 | Electrify America | GPS | 38.293961 | -77.512350 | ... | NaN | 2020-07-01 | CHADEMO J1772COMBO | US | Public | public | NaN | NaN | NaN | 7 |
| 572 | ELEC | Reston | VA | 20191 | NaN | 4.0 | Electrify America | GPS | 38.950829 | -77.358475 | ... | NaN | 2020-07-09 | CHADEMO J1772COMBO | US | Public | public | NaN | NaN | NaN | 7 |
| 578 | ELEC | Springfield | VA | 22150 | 1.0 | 3.0 | Electrify America | GPS | 38.775894 | -77.172441 | ... | NaN | 2020-07-25 | CHADEMO J1772 J1772COMBO | US | Public | public | NaN | NaN | NaN | 7 |
| 582 | ELEC | Sterling | VA | 20166 | NaN | 4.0 | Electrify America | GPS | 38.977930 | -77.425894 | ... | NaN | 2020-08-26 | CHADEMO J1772COMBO | US | Public | public | NaN | NaN | NaN | 7 |
| 583 | ELEC | Fairfax | VA | 22030 | NaN | 4.0 | Electrify America | GPS | 38.861694 | -77.275739 | ... | NaN | 2020-08-26 | CHADEMO J1772COMBO | US | Public | public | NaN | NaN | NaN | 7 |
10 rows × 23 columns
############################## Below key-value pairs allows us to identify the facility type ###################################
type_mapping = {
'street_address': 'Street Address',
'premise': 'Premise',
'car_repair': 'Car Repair',
'establishment': 'Establishment',
'car_dealer': 'Car Dealer'
}
################################ Perform clustering on other EV network stations ###########################################
kmeans = KMeans(n_clusters=10) # Increased number of clusters to 10
other_stations['Cluster'] = kmeans.fit_predict(other_stations[['Latitude', 'Longitude']])
################################ Finding the cluster centers for other stations ###########################################
cluster_centers, _ = pairwise_distances_argmin_min(kmeans.cluster_centers_, other_stations[['Latitude', 'Longitude']])
############### Calculating distances from Electrify America stations to cluster centers of other stations #################
ea_distances = pairwise_distances_argmin_min(ea_stations[['Latitude', 'Longitude']], other_stations[['Latitude', 'Longitude']])
########################################### Let's select suggested stations ################################################
################################ Select 15 stations farthest from existing stations ########################################
suggested_indices = np.argsort(ea_distances[1])[-15:]
suggested_stations = other_stations.iloc[suggested_indices]
################################# Initialize geolocator with Google Maps Geocoding API key #################################
geolocator = GoogleV3(api_key='')
################################## Function to get location details and facility type ######################################
def get_location_details(latitude, longitude):
location = geolocator.reverse((latitude, longitude), exactly_one=True)
if location:
############## Extract facility type from the types field in the geocoding response ##################
facility_type = next(iter(location.raw.get('types', [])), None)
facility_description = type_mapping.get(facility_type, facility_type)
return {
'Location Details': location.address,
'Facility Type': facility_type
}
else:
return None
########## Apply the function to the DataFrame and expand the result into separate columns for existing stations ############
ea_stations[['Location Details', 'Facility Type']] = ea_stations.apply(
lambda row: pd.Series(get_location_details(row['Latitude'], row['Longitude'])), axis=1)
######## Apply the function to the DataFrame and expand the result into separate columns for suggested stations #############
suggested_stations[['Location Details', 'Facility Type']] = suggested_stations.apply(
lambda row: pd.Series(get_location_details(row['Latitude'], row['Longitude'])), axis=1)
####################################### EV stations in Virginia using Plotly Express ########################################
fig = px.scatter_mapbox(virginia, lat='Latitude', lon='Longitude', color='EV Network',
color_discrete_map={'Electrify America': 'red', 'Other Providers': 'blue'},
hover_data={'EV Network': True, 'Facility Type': True}, # Add 'Facility Type' to hover data
mapbox_style='carto-positron', zoom=6, center={'lat': 38.0037, 'lon': -79.4588})
########################## Adding layout to the existing Electrify America stations #########################################
fig.add_trace(go.Scattermapbox(
lat=ea_stations['Latitude'],
lon=ea_stations['Longitude'],
mode='markers',
marker=dict(size=10, color='red'),
name='Existing Electrify America Stations',
hoverinfo='text',
text=['<b>EV Network</b>: {}<br><b>Facility</b>: {}<br><b>Location Details</b>: {}'.format(ev_network, facility_type, location_details)
for ev_network, facility_type, location_details in zip(ea_stations['EV Network'], ea_stations['Facility Type'], ea_stations['Location Details'])]
))
########################## Adding layout to the suggested Electrify America stations #########################################
fig.add_trace(go.Scattermapbox(
lat=suggested_stations['Latitude'],
lon=suggested_stations['Longitude'],
mode='markers',
marker=dict(symbol='circle', size=10, color='blue'),# Change symbol to circle and color to blue
name='New Electrify America Stations',
hoverinfo='text',
text=['<b>EV Network</b>: {}<br><b>Facility</b>: {}<br><b>Location Details</b>: {}'.format(ev_network, facility_type, location_details)
for ev_network, facility_type, location_details in zip(suggested_stations['EV Network'], suggested_stations['Facility Type'], suggested_stations['Location Details'])]
))
# Updating layout
fig.update_layout(title='EV Stations in Virginia', margin={"r": 0, "t": 30, "l": 0, "b": 0})
fig.show()
C:\Users\Harinath\AppData\Local\Temp\ipykernel_720\2524339197.py:49: SettingWithCopyWarning: A value is trying to be set on a copy of a slice from a DataFrame. Try using .loc[row_indexer,col_indexer] = value instead See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy